Classification Metrics

Machine learning
R
Python
Author

Tony D

Published

March 12, 2025

Classification Metrics Explained | Sensitivity, Precision, AUROC, & More

install pacakge

pip install -U scikit-learn
pip install -U kaggle
pip install -U kagglehub

load pacakge

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

#from kaggle.api.kaggle_api_extended import KaggleApi
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    precision_score, recall_score, roc_curve,
    accuracy_score, f1_score, roc_auc_score,
    average_precision_score, confusion_matrix,
    precision_recall_curve
)

download data from kaggle

import kagglehub
# Download latest version
kagglehub.dataset_download("uciml/pima-indians-diabetes-database")
path = kagglehub.dataset_download("uciml/pima-indians-diabetes-database")
print("Path to dataset files:", path)
Path to dataset files: /Users/jinchaoduan/.cache/kagglehub/datasets/uciml/pima-indians-diabetes-database/versions/1

show data file under download folder

import os
os.listdir(path)
['diabetes.csv']

read data

df = pd.read_csv(path+'/'+os.listdir(path)[0])
df.head()
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1
df.Outcome.value_counts()
Outcome
0    500
1    268
Name: count, dtype: int64
# separate features from response
X = df.drop('Outcome', axis=1)
y = df['Outcome']
# split data into test and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# initialize and train logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
LogisticRegression(max_iter=1000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# predict on the test set and get the probas
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1] 
# quickly look at the distribution of the probas
percentiles = np.percentile(y_pred_proba, [5, 25, 50, 75, 95])
percentiles
array([0.03455652, 0.11989883, 0.29954411, 0.64776581, 0.87083353])

confusion matrix

# generate confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.xticks([0.5, 1.5], ['No Diabetes', 'Diabetes'])
plt.yticks([0.5, 1.5], ['No Diabetes', 'Diabetes'], va='center')
plt.show()

# recall / sensitivity
recall = recall_score(y_test, y_pred)
recall
0.6727272727272727
# precision / positive predictive value
precision = precision_score(y_test, y_pred)
precision
0.6379310344827587
# specificity
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
specificity
np.float64(0.7878787878787878)
# accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy
0.7467532467532467
# f1
f1 = f1_score(y_test, y_pred)
f1
0.6548672566371682
# get ROC curve values
fpr, tpr, thresholds_roc = roc_curve(y_test, y_pred_proba)

# get PR curve values
precision, recall, thresholds_pr = precision_recall_curve(y_test, y_pred_proba)

# get areas under the curves
auroc = roc_auc_score(y_test, y_pred_proba)
pr_auc = average_precision_score(y_test, y_pred_proba)
# plot both curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
ax1.plot(fpr, tpr, color='darkorange', lw=2, label=f'AUC = {auroc:.2f}')
ax1.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate')
ax1.set_title('Receiver Operating Characteristic (ROC) Curve')
ax1.legend(loc="lower right")

# Plot Precision-Recall Curve
ax2.plot(recall, precision, color='purple', lw=2, label=f'PR-AUC = {pr_auc:.2f}')
ax2.set_xlabel('Recall')
ax2.set_ylabel('Precision')
ax2.set_title('Precision-Recall Curve')
ax2.legend(loc="lower left")

plt.show()

y_test.value_counts()
Outcome
0    99
1    55
Name: count, dtype: int64

Reference

https://www.youtube.com/watch?v=KdUrfY1yM0w

https://github.com/RichardOnData/YouTube/blob/main/Python%20Notebooks/classification_metrics.ipynb